{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>29</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>32</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>47</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>50</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating\n",
       "0       1        2     3.5\n",
       "1       1       29     3.5\n",
       "2       1       32     3.5\n",
       "3       1       47     3.5\n",
       "4       1       50     3.5"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取rating.csv, 读取列为： userId, movieId, rating\n",
    "df_data = pd.read_csv('./data/rating.csv', sep=',', usecols=['userId', 'movieId', 'rating'])\n",
    "# 打印前5行\n",
    "df_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "296    67310\n",
       "356    66172\n",
       "318    63366\n",
       "593    63299\n",
       "480    59715\n",
       "Name: movieId, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 每个电影id对应的打分次数\n",
    "movie_rating_count = df_data['movieId'].value_counts()\n",
    "# 打印前5行\n",
    "movie_rating_count.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    67310\n",
      "1    66172\n",
      "2    63366\n",
      "3    63299\n",
      "4    59715\n",
      "Name: movieId, dtype: int64\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEWCAYAAABMoxE0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZxcZZ3v8c+3u9OdfW9iSAIJEHAAFSWGuAyzZBDEBcaBIW5Eb+7NiKj4cpwRZrwOOpcZ1OvGCDgoXgIqEHEhLlEZFh0RExoBQ4BIIJCERNKQhZC9u3/3j/NUUtWprq5eqquX7/tlverUc85zzvNUYf/yLOc5igjMzMy6q6baBTAzs4HNgcTMzHrEgcTMzHrEgcTMzHrEgcTMzHrEgcTMzHrEgcQMkPSnktZUuxwAkt4t6RfVLodZuRxIbECQ9LSk/ZImt0t/SFJImtmT80fEf0fECd0s2w2pbC9J2irpDkkvLzPvzFT+uryyfDsi3tSdspR5zXdJakrl3SxpuaQ3Vup6edcNScdV+jrW9xxIbCBZB7wz90HSK4AR1StOgc9FxGhgGvAscH2Vy1OUpI8BXwb+DZgCHAVcA5xTzXLZwOZAYgPJTcCFeZ8XAjfmHyBpnKQbJTVLekbSJyXVSGqQtF3SyXnHNkraI+kISX8uaWPeviMlfS+dZ52kj5RTwIjYAywFTsk711skPSjpRUkbJF2el+VX6X17aiG8TtL7JP06L39I+oCkJyRtk3S1JKV9tZK+IOn5VM4PtW/h5H83wGeAiyPi+xGxKyIORMSPIuIf0jENkr4saVN6fVlSQ9pXUK68sh2Xtm9IZfuJpJ2SVkg6Nu3L1fPhVM8Lyvk+bWBwILGB5LfAWEl/IqkWuAD4Vrtj/gMYBxwD/BlZ4Hl/ROwDvk9eiwb4W+CXEbEl/wSSaoAfAQ+TtTDmAx+VdGZnBZQ0Kl1jbV7yrlSO8cBbgIsknZv2nZ7ex0fE6Ii4r4NTvxV4LfCqVO5cWf4X8GaywPUa4NyiuTOvA4YDPyhxzD8D89L5XgXMBT5Z4vj23gl8GphA9h1cARARuXq+KtXz1i6c0/o5BxIbaHKtkjOAx8m6kYDsX+dkweWyiNgZEU8DXwDemw75DoWB5F0prb3XAo0R8ZmI2B8RTwFfBxaUKNfHJW0HdgJvzLsmEXFPRKyKiLaI+D1wM1mQ64orI2J7RKwH7uZQi+dvga9ExMaI2AZcWeIck4DnI6KlxDHvBj4TEVsiopksKLy3xPHtfT8iVqZrfDuvnDaIOZDYQHMTWQB4H+26tYDJQD3wTF7aM2StCoC7gBGSTpN0NNkfuWL/Oj8aODJ1hW1PAeKfyMYUOvJ/I2I8MBPYAxwcuE/Xuzt1k+0APpDK2hV/zNveDYxO20cCG/L25W+39wIwuVi3V54jOfz7O7IXymmDmAOJDSgR8QzZoPvZZF1V+Z4HDpAFgpyjSK2WiGgjG794J1kw+nFE7CxymQ3AuogYn/caExFnl1G+9cAlwFck5SYCfAdYBsyIiHHA1wDlsnR2zk5sBqbnfZ5R4tj7gL2U7v7axOHf36a0vQsYmdsh6WVdKqkNWg4kNhAtAv4yInblJ0ZEK1mguELSmNTq+BiF4yjfIev+ejfFu7UAVgIvSvqEpBFpQPtkSa8tp3ARcQfZH9/FKWkMsDUi9kqaSxbEcpqBNrIxne5YClwiaZqk8cAnSpRrB/Ap4GpJ50oaKWmYpDdL+lw67Gbgk2kiwuR0fO77exg4SdIpkoYDl3exrM/R/XpaP+ZAYgNORDwZEU0d7P4w2b+cnwJ+TRYsvpmXd0XafySwvIPztwJvI+v6WkfW0vkG2SB+uT4P/GOa8fRB4DOSdpL9YV6ad63dZAPS96ZutHlduAZkYze/AH4PPAj8FGgBWosdHBFfJAuunyQLYhuADwE/TIf8H6ApnW8V8LuURkT8gWzW138BT5B9v11xObAk1fNvu5jX+jH5wVZmg4ekNwNfi4ijOz3YrJe4RWI2gKWut7Ml1UmaBvwLpaf3mvU6t0jMBjBJI4FfAi8nmy32E+CSiHixqgWzIcWBxMzMeqRiXVuSTlC2oF7u9aKkj0qaqGxRuyfS+4S8PJdJWitpTf5dxJJOlbQq7bsqb3mIBkm3pvQV6uHCfWZm1nV90iJJdxw/C5wGXEw2FfJKSZcCEyLiE5JOJJt6OJdsRs1/AcdHRKuklWRz839LNivlqohYLumDwCsj4gOSFgB/HREl1/CZPHlyzJw5s0I1NTMbnB544IHnI6Kx2L5Sd7j2pvnAkxHxjKRzgD9P6UuAe8jmvp8D3JLWRFonaS0wV9LTwNjcGkSSbiS7oWp5ynN5OtdtwFclKUpEx5kzZ9LU1NHMUTMzK0bSMx3t66tZWwvIWhsAUyJiM0B6PyKlT6NweYeNKW1a2m6fXpAnre2zg2w9oQKSFit7/kJTc3Nzr1TIzMwyFQ8kkuqBtwPf7ezQImlRIr1UnsKEiOsiYk5EzGlsLNoyMzOzbuqLFsmbgd9FxHPp83OSpgKk99wS3hspXCdoOtkyExspXEsol16QJy1ENw7YWoE6mJlZB/oikLyTQ91akC1etzBtLwRuz0tfkGZizQJmAytT99dOSfPSbK0L2+XJnes84K5S4yNmZtb7KjrYnm6WOgP4u7zkK4GlkhYB64HzASJitaSlwKNkawVdnNY8ArgIuIHssarLObRG0vXATWlgfiulnxdhZmYVMORuSJwzZ0541paZWddIeiAi5hTb57W2zMysRxxIynT/01v54i/WsL+lrdpFMTPrVxxIyvTAM9u46q61tLQ5kJiZ5XMgKdPB56IOrSElM7NOOZCUqSZbJ7LHD9g2MxtsHEjKlOIIbW6SmJkVcCDpIscRM7NCDiRlUq5J4kBiZlbAgaRMBwfbHUnMzAo4kJTpYIPEccTMrIADSZkOtUjMzCyfA0mZamrS9F83SczMCjiQlCnXImlzHDEzK+BAUq6DNyQ6kpiZ5XMgKdPBZ/o6jpiZFXAgKZNvIzEzK86BpEwiN9he5YKYmfUzDiRlOtQicSQxM8vnQFKmGt+QaGZWlANJmXJdW17918yskANJudwiMTMrqqKBRNJ4SbdJelzSY5JeJ2mipDskPZHeJ+Qdf5mktZLWSDozL/1USavSvquUluKV1CDp1pS+QtLMitWlUic2MxvgKt0i+Qrws4h4OfAq4DHgUuDOiJgN3Jk+I+lEYAFwEnAWcI2k2nSea4HFwOz0OiulLwK2RcRxwJeAz1aqIrll5N0iMTMrVLFAImkscDpwPUBE7I+I7cA5wJJ02BLg3LR9DnBLROyLiHXAWmCupKnA2Ii4L7KFrm5slyd3rtuA+Tr44JBerk9696wtM7NClWyRHAM0A/9P0oOSviFpFDAlIjYDpPcj0vHTgA15+TemtGlpu316QZ6IaAF2AJPaF0TSYklNkpqam5u7VZma9E25RWJmVqiSgaQOeA1wbUS8GthF6sbqQLGWRJRIL5WnMCHiuoiYExFzGhsbS5e6w8J51paZWTGVDCQbgY0RsSJ9vo0ssDyXuqtI71vyjp+Rl386sCmlTy+SXpBHUh0wDtja6zXBS6SYmXWkYoEkIv4IbJB0QkqaDzwKLAMWprSFwO1pexmwIM3EmkU2qL4ydX/tlDQvjX9c2C5P7lznAXdFhR8Y4gaJmVmhugqf/8PAtyXVA08B7ycLXkslLQLWA+cDRMRqSUvJgk0LcHFEtKbzXATcAIwAlqcXZAP5N0laS9YSWVCpihwaw3ckMTPLV9FAEhEPAXOK7JrfwfFXAFcUSW8CTi6SvpcUiCrtYBhxHDEzK+A728vkMRIzs+IcSMpU4xsSzcyKciAp06FntjuSmJnlcyApk7xoo5lZUQ4kZUtdWx4lMTMr4EBSJrdIzMyKcyApk5eRNzMrzoGkTF5G3sysOAeSMh18ZrvHSMzMCjiQlCk3RtLmOGJmVsCBpEy5ZeQrvCakmdmA40BSLi+RYmZWlANJmbxoo5lZcQ4kZfIy8mZmxTmQlKnGNySamRXlQFKmQ89sr3JBzMz6GQeSMh1aIsWRxMwsnwNJmTxCYmZWnANJuTxGYmZWlANJmeRl5M3MiqpoIJH0tKRVkh6S1JTSJkq6Q9IT6X1C3vGXSVoraY2kM/PST03nWSvpKqW5uJIaJN2a0ldImlm5uqQNxxEzswJ90SL5i4g4JSLmpM+XAndGxGzgzvQZSScCC4CTgLOAayTVpjzXAouB2el1VkpfBGyLiOOALwGfrVQlDj6zvVIXMDMboKrRtXUOsCRtLwHOzUu/JSL2RcQ6YC0wV9JUYGxE3BfZlKkb2+XJnes2YL4O3TnYqw4t2uhQYmaWr9KBJIBfSHpA0uKUNiUiNgOk9yNS+jRgQ17ejSltWtpun16QJyJagB3ApPaFkLRYUpOkpubm5m5VxEukmJkVV1fh878hIjZJOgK4Q9LjJY4t1pKIEuml8hQmRFwHXAcwZ86cboUCedFGM7OiKtoiiYhN6X0L8ANgLvBc6q4ivW9Jh28EZuRlnw5sSunTi6QX5JFUB4wDtlaiLngZeTOzoioWSCSNkjQmtw28CXgEWAYsTIctBG5P28uABWkm1iyyQfWVqftrp6R5afzjwnZ5cuc6D7grKvSX3i0SM7PiKtm1NQX4QRr7rgO+ExE/k3Q/sFTSImA9cD5ARKyWtBR4FGgBLo6I1nSui4AbgBHA8vQCuB64SdJaspbIgkpV5mAfmiOJmVmBigWSiHgKeFWR9BeA+R3kuQK4okh6E3BykfS9pEBUaYem/zqSmJnl853tZTo4/betuuUwM+tvHEjKdGiJFDMzy+dAUiYvI29mVpwDSRc5jJiZFXIgKZO8jLyZWVEOJGWq8fK/ZmZFOZCU6dCijdUth5lZf+NAUqaDs7YcSMzMCjiQlOnQEimOJGZm+RxIyuRl5M3MinMgKZMXbTQzK86BpGxeRt7MrBgHkjLVVOQBvmZmA58DSZlyj4L3M9vNzAp1GkjSc0PyP9dK+pfKFal/8mC7mVlx5bRI5kv6qaSpkk4GfguMqXC5+h0vkWJmVlynD7aKiHdJugBYBewG3hkR91a8ZP2Ml5E3MyuunK6t2cAlwPeAp4H3ShpZ4XL1O15G3sysuHK6tn4EfCoi/g74M+AJ4P6KlqofcxgxMytUzjPb50bEiwCR/XP8C5KWVbZY/U9Nje9INDMrppwWyQhJ10v6GYCkE4HTy71AmuX1oKQfp88TJd0h6Yn0PiHv2MskrZW0RtKZeemnSlqV9l2lNBdXUoOkW1P6Ckkzyy1XV+VmbXn6r5lZoXICyQ3Az4Gp6fMfgI924RqXAI/lfb4UuDMiZgN3ps+5ALUAOAk4C7hGUm3Kcy2wGJidXmel9EXAtog4DvgS8NkulKtLvESKmVlx5QSSyRGxFGgDiIgWoLWck0uaDrwF+EZe8jnAkrS9BDg3L/2WiNgXEeuAtcBcSVOBsRFxX+pau7Fdnty5biObqlyRe9C9jLyZWXHlBJJdkiaR/jEuaR6wo8zzfxn4R1IQSqZExGaA9H5ESp8GbMg7bmNKm5a226cX5EkBbgcwqX0hJC2W1CSpqbm5ucyitz9H9u5l5M3MCpUTSD4GLAOOlXQvWYvgw51lkvRWYEtEPFBmWYq1JKJEeqk8hQkR10XEnIiY09jYWGZxihfOLRIzs0Ll3JD4O0l/BpxA9vd0TUQcKOPcbwDeLulsYDgwVtK3gOckTY2Izanbaks6fiMwIy//dGBTSp9eJD0/z0ZJdcA4YGsZZes6j5GYmRXVYYtE0jtyL+DtZIHkeOBtKa2kiLgsIqZHxEyyQfS7IuI9ZK2bhemwhcDtaXsZsCDNxJpFNqi+MnV/7ZQ0L41/XNguT+5c56VrVORvfY28jLyZWTGlWiRvS+9HAK8H7kqf/wK4B/h+N695JbA0LQa5HjgfICJWS1oKPAq0ABdHRG5Q/yKy2WMjgOXpBXA9cJOktWQtkQXdLFOn3LVlZlZch4EkIt4PkO7/ODE3QJ66o67uykUi4h6y4ENEvADM7+C4K4AriqQ3AScXSd9LCkSVJrdIzMyKKmewfWYuiCTPkXVxDSkHWyRVLYWZWf9TzhIp90j6OXAz2d/RBcDdFS1VP+Rl5M3Miitn1taH0uD6n6ak6yLiB5UtVv/jZeTNzIorp0VCRHyf7g+uDwpKnYAeIzEzK1TO80jekRZY3CHpRUk7Jb3YF4XrTzxry8ysuHJaJJ8D3hYRj3V65CB2cNaWO7fMzAqUM2vruaEeRMAtEjOzjpTTImmSdCvwQ2BfLjGNmwwZXkbezKy4cgLJWGA38Ka8tGCIDb57GXkzs+LKmf77/r4oSH/nZeTNzIrrMJBI+g9K9ORExEcqUqJ+yjckmpkVV6pF0tRnpRgA6muzeQn7W9o6OdLMbGgptWjjko72DUWSGFYrDrQ6kJiZ5Stn+q8lw2prHEjMzNpxIOmCLJB4kMTMLJ8DSRcMq61hv1skZmYFyllr63hJd0p6JH1+paRPVr5o/U99rTjgwXYzswLltEi+DlwGHACIiN9TwUfa9mfD6jxGYmbWXjmBZGRErGyX1lKJwvR37toyMztcOYHkeUnHkm5OlHQesLl0lsGpvrbG95GYmbVTzlpbFwPXAS+X9CywDnhPRUvVT9XX1bDPgcTMrECnLZKIeCoi/gpoBF4eEW+MiKc7yydpuKSVkh6WtFrSp1P6REl3pIdl3SFpQl6eyyStlbRG0pl56adKWpX2XaX0cBBJDZJuTekrJM3s8jfQBfV1bpGYmbVXaq2t90TEtyR9rF06ABHxxU7OvQ/4y4h4SdIw4NeSlgPvAO6MiCslXQpcCnxC0olkg/gnAUcC/yXp+IhoBa4FFgO/BX4KnAUsBxYB2yLiOEkLgM8CF3TtKyhfQ10NL+0bksNDZmYdKtUiGZXex3TwKikyL6WPw9IrgHOA3PIrS4Bz0/Y5wC0RsS8i1gFrgbmSpgJjI+K+yB6YfmO7PLlz3QbMz7VWKsFjJGZmhyu11tZ/ps1rIqK5OyeXVAs8ABwHXB0RKyRNiYjN6RqbJR2RDp9G1uLI2ZjSDqTt9um5PBvSuVok7QAmAc+3K8dishYNRx11VHeqArhry8ysmHJmbf1G0i8kLcofzyhHRLRGxCnAdLLWxcklDi/WkogS6aXytC/HdRExJyLmNDY2dlbsDtX7PhIzs8OUM9g+G/gk2djFA5J+LKlLs7YiYjtwD9nYxnOpu4r0viUdthGYkZdtOrAppU8vkl6QR1IdMA7Y2pWydcUwd22ZmR2mrLW2ImJlRHwMmEv2h7rTJeYlNUoan7ZHAH8FPA4sAxamwxYCt6ftZcCCNBNrFjAbWJm6wXZKmpfGPy5slyd3rvOAu9I4SkXU1/mGRDOz9jq9j0TSWOCvyWZUHQv8gCygdGYqsCSNk9QASyPix5LuA5ZKWgSsB84HiIjVkpYCj5LdOX9xmrEFcBFwAzCCbLbW8pR+PXCTpLVkAa6iS7fU1/o+EjOz9sq5IfFh4IfAZyLivnJPnNbkenWR9BeA+R3kuQK4okh6E3DY+EpE7CUFor7Q4MF2M7PDlBNIjomIkDRG0ui8Kb1DTq5rKyKo4CxjM7MBpZwxkpMkPQg8Ajwq6YFOZl8NWvW1NURAS5sfbmVmllNOILkO+FhEHB0RRwF/n9KGnPq67Oty95aZ2SHlBJJREXF37kNE3MOhu96HlFwg8b0kZmaHlDNG8pSk/w3clD6/h2wF4CHHLRIzs8OV0yL5H2Qr/36fbOpvI/D+Shaqv6qvzb4uTwE2Mzuk0xZJRGwDPtIHZen3DrZI3LVlZnZQqWXkl5XKGBFv7/3i9G+5Fom7tszMDinVInkd2cq6NwMrKL5A4pDiMRIzs8OVCiQvA84A3gm8C/gJcHNErO6LgvVH7toyMztch4PtaQn4n0XEQmAe2YOm7pH04T4rXT/jri0zs8OVHGyX1AC8haxVMhO4imz21pDkFomZ2eFKDbYvIVsocTnw6Yh4pM9K1U95jMTM7HClWiTvBXYBxwMfyVukUGSPZB9b4bL1O6Mbsq9r596WKpfEzKz/KPXM9rIeejWUjB9RD8COPQeqXBIzs/7DwaILhtdnX9feA62dHGlmNnQ4kHRBfW0NNXIgMTPL50DSBZIYMayWPfsdSMzMchxIumhEfR27HEjMzA5yIOmiscPr2LnXg+1mZjkVCySSZki6W9JjklZLuiSlT5R0h6Qn0vuEvDyXSVoraY2kM/PST5W0Ku27SmkusqQGSbem9BWSZlaqPjljhtfxoqf/mpkdVMkWSQvw9xHxJ2RLrFws6UTgUuDOiJgN3Jk+k/YtAE4CzgKukVSbznUtsBiYnV5npfRFwLaIOA74EvDZCtYHgLEjhvGip/+amR1UsUASEZsj4ndpeyfwGDANOAdYkg5bApybts8BbomIfRGxjmxtr7mSpgJjI+K+iAjgxnZ5cue6DZivvDsnK2FkfS2797tFYmaW0ydjJKnL6dVky9FPiYjNkAUb4Ih02DSyZetzNqa0aWm7fXpBnohoAXYAk4pcf7GkJklNzc3NParLqIY6du3zYLuZWU7FA4mk0cD3gI9GxIulDi2SFiXSS+UpTIi4LiLmRMScxsbGzopc0tjhw3jRg+1mZgdVNJBIGkYWRL4dEblVg59L3VWk9y0pfSMwIy/7dGBTSp9eJL0gj6Q6YBywtfdrcsjY4XW8tK+FtrbD4pWZ2ZBUyVlbAq4HHouIL+btWgYsTNsLgdvz0hekmVizyAbVV6bur52S5qVzXtguT+5c5wF3pXGUihkzfBgRsMvjJGZmQCfPI+mhN5CtILxK0kMp7Z+AK4GlkhYB64HzASJitaSlwKNkM74ujojcYMRFwA3ACLJl7Zen9OuBmyStJWuJLKhgfYBs+i/Ai3tbGDN8WKUvZ2bW71UskETEr+n4Oe/zO8hzBXBFkfQmsmejtE/fSwpEfWV0CiQv+V4SMzPAd7Z3We6ZJC/t84C7mRk4kHRZrjvLd7ebmWUcSLpo7HA/JdHMLJ8DSReNSl1bu/Y5kJiZgQNJl43xYLuZWQEHki4a3VBHjfzcdjOzHAeSLpLE5NENNO/cV+2imJn1Cw4k3XDk+BFs3L672sUwM+sXHEi64ZjJo3iqeVe1i2Fm1i84kHTDkeNHsGXnPlpa26pdFDOzqnMg6YaXjRtOa1vw/Ev7q10UM7OqcyDphqMmjgTgqeaXqlwSM7PqcyDphpdPHQPAE1scSMzMHEi6oXF0AyPra3nmBc/cMjNzIOkGSbxs3HA279hT7aKYmVWdA0k3HT1xJOue9xRgMzMHkm46fsoYnmrexQFPATazIc6BpJtOmjaO/a1trPnjzmoXxcysqhxIuunEqWMBeNyBxMyGOAeSbpo5aSQNdTU8tvnFahfFzKyqKhZIJH1T0hZJj+SlTZR0h6Qn0vuEvH2XSVoraY2kM/PST5W0Ku27SpJSeoOkW1P6CkkzK1WXYupqazi2cbS7tsxsyKtki+QG4Kx2aZcCd0bEbODO9BlJJwILgJNSnmsk1aY81wKLgdnplTvnImBbRBwHfAn4bMVq0oFjjxjN0y945paZDW0VCyQR8Stga7vkc4AlaXsJcG5e+i0RsS8i1gFrgbmSpgJjI+K+iAjgxnZ5cue6DZifa630lROnjmXjtj1s3eU1t8xs6OrrMZIpEbEZIL0fkdKnARvyjtuY0qal7fbpBXkiogXYAUyqWMmLOPXorGeu6en28dLMbOjoL4PtxVoSUSK9VJ7DTy4tltQkqam5ubmbRTzcK6ePo76uhpXrHEjMbOjq60DyXOquIr1vSekbgRl5x00HNqX06UXSC/JIqgPGcXhXGgARcV1EzImIOY2Njb1UFRg+rJZXzxjPCgcSMxvC+jqQLAMWpu2FwO156QvSTKxZZIPqK1P3105J89L4x4Xt8uTOdR5wVxpH6VPzjpnE6k072LHnQF9f2sysX6jk9N+bgfuAEyRtlLQIuBI4Q9ITwBnpMxGxGlgKPAr8DLg4IlrTqS4CvkE2AP8ksDylXw9MkrQW+BhpBlhfe+PsybQF3LNmS+cHm5kNQqrCP+Kras6cOdHU1NRr52trC07//N3MmDCSmxfP67Xzmpn1J5IeiIg5xfb1l8H2AaumRrzrtKO476kXeNJPTDSzIciBpBecf+oM6mrEzSvWV7soZmZ9zoGkFzSOaeDMk1/Gbb/byN4DrZ1nMDMbRBxIesm75x7F9t0H+MWjz1W7KGZmfcqBpJecdswkjhw3nFtWunvLzIYWB5JeUlsjFr5+Jr958gV+8+Tz1S6OmVmfcSDpRQtfP5MZE0fwqdtX+xG8ZjZkOJD0ouHDarn8bSexdstLfOO/11W7OGZmfcKBpJfN/5MpnHnSFL54xxoeeXZHtYtjZlZxDiQV8O/veCUTR9XzwW//juad+6pdHDOzinIgqYCJo+q55t2n0rxzH4tvavK9JWY2qDmQVMipR0/gSxe8ioc2bOeSWx704LuZDVoOJBV01slT+dRbT+Tnq59j8Y1N7NrXUu0imZn1OgeSCnv/G2bxr+eezC//0Mzbv/prHt30YrWLZGbWqxxI+sB75x3Ntxadxs69LZx79b3ccO86htry/WY2eDmQ9JHXHzeZ5Zf8KW+cPZnLf/Qo77j2Nzy0YXu1i2Vm1mMOJH1o0ugGrl84h8+f90rWv7Cbc6++lw/c9IC7u8xsQKurdgGGGkmcP2cGb37FVL7+q6e4/tfr+NnqPzLvmIlc8NoZnHHiyxjd4J/FzAYOP2q3yrbt2s/N96/nOyvWs3HbHupqxMnTxnHarIm8dmb2GjdyWLWLaWZDXKlH7TqQ9BNtbcED67dxz5otrFy3lYc37GB/axsSnDBlDHNnTeSV08dz/JTRHD9lDMOH1Va7yGY2hDiQ5OmvgaS9vQdaeXjDdlau28rKp7fywDPb2L0/u0NegiPHjeCoiSOZ1TiKI8cN58jx2efpE0YyeXQ9dbUe/jKz3lMqkAz4znhJZwFfAWqBb0TElVUuUq8YPqyW046ZxGnHTAKgpbWN9Vt38/gfd7LmjztZv3U3657fxfJVmzjCAvMAAAmeSURBVNm2+0BBXgkmjqyncUwDjWMaGD+ynnEj6pgwsp4JI+sZP3IY40cOY3TDMEY11DKmYRgj6msZmV6SqlFlMxugBnQgkVQLXA2cAWwE7pe0LCIerW7Jel9dbQ3HNI7mmMbRnP2KqQX79h5o5dnte1i/dTfPbtvDlp37aE6v51/ax4atu9m+5wA79hygnAboiGG1jGqopaGuloZhNQyvq2VEfS0NdTXU19VQX1tDw7Ds87DamoPpw2rFsNqa9BJ1NYfS6mprqKsRtTWirkYHP9fVilqJmrSvRtl7lsbB42t06CWRHS9Ro2wCQ43I9tccShNZUBVZnoJtOHguB06znhnQgQSYC6yNiKcAJN0CnAMMukBSyvBhtRzbOJpjG0eXPK6tLdix5wDb9xxg++797NrXykv7DvDSvlb27G9h1/5Wdu9rYff+VnYfaGXvgVb2HWhj74FW9ra0sr+ljV37WtjX0pa9DrSyvzXY39LK/tY29re00TaAe0oPCzBkCYVpKUil7Y7P1fHejnZ1+3xdvE5nV+te+UpdqfjO0nlKXavrgb/ktTqsb9e/oyxfqWt18F2UyFNqZ8e/ffE9l8yfzdtedWSpq3XLQA8k04ANeZ83Aqe1P0jSYmAxwFFHHdU3JeuHamrEhFH1TBhVD4yqyDXa2oIDbW20tAYtrcH+1jZacp/bgta2Nlrasn0HWttSWtDWFrRG2o6gtY2D2y1tQUS23dZG9h5BW3Dove1QWu7YCAhI7+lzu/S2CILsQ/u0XL70P9raCo/pjo7GJEudrdSlooOcpfN051od5yp5rQ72dVTuTs/XrTxdP2Hp76hE2Uvm66NrlTjhuBGVmQE60ANJsbB72NcYEdcB10E22F7pQg1lNTWioaYW3wpjNnQM9Kk9G4EZeZ+nA5uqVBYzsyFpoAeS+4HZkmZJqgcWAMuqXCYzsyFlQHdARESLpA8BPyeb/vvNiFhd5WKZmQ0pAzqQAETET4GfVrscZmZD1UDv2jIzsypzIDEzsx5xIDEzsx5xIDEzsx4Zcqv/SmoGnulm9snA871YnP7IdRwcXMfBoT/V8eiIaCy2Y8gFkp6Q1NTRMsqDhes4OLiOg8NAqaO7tszMrEccSMzMrEccSLrmumoXoA+4joOD6zg4DIg6eozEzMx6xC0SMzPrEQcSMzPrEQeSMkk6S9IaSWslXVrt8nSFpKclrZL0kKSmlDZR0h2SnkjvE/KOvyzVc42kM/PST03nWSvpKlXxYeeSvilpi6RH8tJ6rU6SGiTdmtJXSJrZl/VLZShWx8slPZt+y4cknZ23byDWcYakuyU9Jmm1pEtS+qD5LUvUcfD8ltmjR/0q9SJbov5J4BigHngYOLHa5epC+Z8GJrdL+xxwadq+FPhs2j4x1a8BmJXqXZv2rQReR/ZkyuXAm6tYp9OB1wCPVKJOwAeBr6XtBcCt/aSOlwMfL3LsQK3jVOA1aXsM8IdUl0HzW5ao46D5Ld0iKc9cYG1EPBUR+4FbgHOqXKaeOgdYkraXAOfmpd8SEfsiYh2wFpgraSowNiLui+y/1hvz8vS5iPgVsLVdcm/WKf9ctwHz+7oF1kEdOzJQ67g5In6XtncCjwHTGES/ZYk6dmTA1dGBpDzTgA15nzdS+j+E/iaAX0h6QNLilDYlIjZD9h86cERK76iu09J2+/T+pDfrdDBPRLQAO4BJFSt513xI0u9T11euy2fA1zF1x7waWMEg/S3b1REGyW/pQFKeYpF9IM2bfkNEvAZ4M3CxpNNLHNtRXQfyd9CdOvXX+l4LHAucAmwGvpDSB3QdJY0Gvgd8NCJeLHVokbQBUc8idRw0v6UDSXk2AjPyPk8HNlWpLF0WEZvS+xbgB2Rddc+lpjLpfUs6vKO6bkzb7dP7k96s08E8kuqAcZTfzVQxEfFcRLRGRBvwdbLfEgZwHSUNI/sD++2I+H5KHlS/ZbE6Dqbf0oGkPPcDsyXNklRPNpi1rMplKoukUZLG5LaBNwGPkJV/YTpsIXB72l4GLEizQGYBs4GVqXthp6R5qe/1wrw8/UVv1in/XOcBd6V+6arK/XFN/prst4QBWsdUpuuBxyLii3m7Bs1v2VEdB9Vv2Zcj+wP5BZxNNtviSeCfq12eLpT7GLIZIA8Dq3NlJ+s/vRN4Ir1PzMvzz6mea8ibmQXMIfuP/Ungq6SVEapUr5vJugMOkP1rbFFv1gkYDnyXbKBzJXBMP6njTcAq4PdkfzymDvA6vpGsC+b3wEPpdfZg+i1L1HHQ/JZeIsXMzHrEXVtmZtYjDiRmZtYjDiRmZtYjDiRmZtYjDiRmZtYjDiRmeSS1ppVYH5H0I0njOzn+lHartr5dvbg6tKSPS3o8ledhSRf21rnT+cdL+mBvntOGHgcSs0J7IuKUiDiZ7M7gizs5/hSyewIAiIhlEXFlbxRE0geAM4C5qTynU3wpjJ4YT7ZyrFm3OZCYdew+0qJ4kuZK+o2kB9P7CWmVg88AF6RWzAWS3ifpqynPDemZEb+R9JSk81J6jaRrlD2b4seSfprb184/AR+MtPZUROyIiCXpHPNTWValBf8aUvrTkian7TmS7knbl6fj7kll+Ui6xpXAsan8n6/M12iDnQOJWRGSaoH5HFoK53Hg9Ih4NfAp4N8ie6TAp8ie/XBKRNxa5FRTye5sfivZH22AdwAzgVcA/5Ps+RLtrz8GGBMRTxbZNxy4AbggIl4B1AEXlVGtlwNnkq3p9C9p/adLgSdT+f+hjHOYHcaBxKzQCEkPAS8AE4E7Uvo44LvKnlb4JeCkMs/3w4hoi4hHgSkp7Y3Ad1P6H4G7i+QTHa/eegKwLiL+kD4vIev26sxPInvGxfNkiyBO6SyDWTkcSMwK7YmIU4CjyZ6GmRsj+Vfg7jRW8TaytY3KsS9vW+3eO5S6s3ZJOqbI7lL5Wzj0/+v2ZcwvSytZS8asxxxIzIqIiB3AR4CPpy6gccCzaff78g7dSfb41K74NfA3aaxkCvDnHRz378DVksYCSBqbHkz2ODBT0nHpuPcCv0zbTwOnpu2/KaMs3Sm/WQEHErMORMSDZKsmLyB7hvi/S7oXqM077G7gxNxge5mn/h7Zar6PAP9J9rS8HUWOuzad//7UpfZLYHdE7AXeT9bVtgpoA76W8nwa+Iqk/yZrdXRWxxeAe9P0Yg+2W7d49V+zKpA0OiJekjSJbNnvN6TxErMBx32kZtXx43SzYz3wrw4iNpC5RWJmZj3iMRIzM+sRBxIzM+sRBxIzM+sRBxIzM+sRBxIzM+uR/w9u2PwXCIcOAAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 深度拷贝一个movie_rating_count\n",
    "movie_rating_count2 = movie_rating_count.copy()\n",
    "# 重置索引，方便画图\n",
    "movie_rating_count2.index = range(movie_rating_count.count())\n",
    "print(movie_rating_count2.head())\n",
    "# 恒左边为重置后的索引，纵坐标为打分次数\n",
    "plt.plot(movie_rating_count2.index, movie_rating_count2)\n",
    "plt.xlabel('Rating Count')\n",
    "plt.ylabel('Movie Index')\n",
    "plt.title('Movie Rating Count')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "123607    1\n",
      "90823     1\n",
      "123609    1\n",
      "123613    1\n",
      "131136    1\n",
      "Name: movieId, dtype: int64\n",
      "最大的商品id： 131262\n",
      "总打分数是： 20000263\n",
      "参与评论的电影数量： 26744\n"
     ]
    }
   ],
   "source": [
    "print(movie_rating_count.tail())\n",
    "# 数据集中出现的最大的电影id\n",
    "movie_id_max = movie_rating_count.index.max()\n",
    "print('最大的商品id：', movie_id_max)\n",
    "\n",
    "# 总打分数\n",
    "total_rating_count = sum(movie_rating_count)\n",
    "print('总打分数是：', total_rating_count)\n",
    "\n",
    "# 参与评论的电影数量：movie_quantity = len(movie_rating_count)也可以\n",
    "movie_quantity = movie_rating_count.count()\n",
    "print('参与评论的电影数量：', movie_quantity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8.029825085551415\n"
     ]
    }
   ],
   "source": [
    "# 计算信息熵\n",
    "h = 0\n",
    "for rating_count in movie_rating_count:\n",
    "    p = rating_count / total_rating_count\n",
    "    logp = np.log(p)\n",
    "    h += -1 * p * logp\n",
    "print(h)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9029762612602118\n"
     ]
    }
   ],
   "source": [
    "# 计算基尼指数\n",
    "gini_index = 0\n",
    "for index in range(len(movie_rating_count)):\n",
    "    p = movie_rating_count.iloc[index] / total_rating_count\n",
    "    # j是根据流行度从小到大排列的列表中，第j个电影，而movie_rating_count中是根据打分次数从大到小排列的\n",
    "    j = movie_quantity - index\n",
    "    gini_index += (2 * j - movie_quantity - 1) * p\n",
    "gini_index = gini_index / (movie_quantity - 1)\n",
    "print(gini_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.20374518139293932\n"
     ]
    }
   ],
   "source": [
    "# 计算覆盖度，这里把电影id的最大值记为总电影数目，把评分过的电影，记为推荐过得电影\n",
    "coverage = movie_quantity / movie_id_max\n",
    "print(coverage)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
